# Get starting time
starting_time <- Sys.time()

################################################################################
# List of packages that we need
packages <- c("ggplot2", "mia", "miaViz")

# Get packages that are already installed installed
packages_already_installed <- packages[ packages %in% installed.packages() ]

# Get packages that need to be installed
packages_need_to_install <- setdiff( packages, packages_already_installed )

# Loads BiocManager into the session. Install it if it not already installed.
if( !require("BiocManager") ){
    install.packages("BiocManager")
    library("BiocManager")
}

# If there are packages that need to be installed, installs them with BiocManager
# Updates old packages.
if( length(packages_need_to_install) > 0 ) {
   install(packages_need_to_install, ask = FALSE)
}

# Load all packages into session. Stop if there are packages that were not
# successfully loaded
if( any(!sapply(packages, require, character.only = TRUE)) ){
    stop("Error in loading packages into the session.")
}

################################################################################
# Additional setup

# Set black and white theme for figures, and Arial font
theme <- theme_bw() + theme(text = element_text(family = "Arial"), 
                            panel.border = element_blank(), 
                            panel.grid.major = element_blank(),
                            panel.grid.minor = element_blank(), 
                            axis.line = element_line(colour = "black"))
theme_set(theme)

EuroBioC2023

Presenter information

All authors are affiliated to Turku Data Science Group in University of Turku, Finland.


Learning goals

  1. Microbiome research studies interactions between microbes (and human, environment…)
  2. Big data requires efficient tools to manipulate the data
  3. miaverse is a SummarizedExperiment framework for microbiome analytics
Figure source: Moreno-Indias et al. (2021) Statistical and Machine Learning Techniques in Human Microbiome Studies: Contemporary Challenges and Solutions. Frontiers in Microbiology 12:11.
Figure source: Moreno-Indias et al. (2021) Statistical and Machine Learning Techniques in Human Microbiome Studies: Contemporary Challenges and Solutions. Frontiers in Microbiology 12:11.

Motivation

Microbiome research

  • Microbiome is a composition of microbes in well-defined area (gut, skin, mouth…)
  • Bilateral interaction between human and microbiome –> affects both health and disease.
  • The research is based on sequencing (characterization of genes and species).
  • Nowadays, multiomics approach is more common (integration of taxonomy information with metabolite data, for example)
  • Computational methods are the new microscope
  • The research has expanded rapidly in previous years
# Plot publication graph
path <- "data/PubMed_Timeline_Results_by_Year.csv"
df <- read.csv(path, skip = 1)

x <- "Year"
y <- "Count"

plot <- ggplot(df, aes(x = .data[[x]], y = .data[[y]])) +
    geom_bar(stat="identity")
plot
PubMed publications per year with a search term 'microbiome' (fetched: Sep 5, 2023)

PubMed publications per year with a search term ‘microbiome’ (fetched: Sep 5, 2023)

Big data

  • Add here something about cohort studies, big data, how analysis is done with big data, requirements…

miaverse (MIcrobiome Analysis)

The structure of the TreeSummarizedExperiment (TreeSE) class.
The structure of the TreeSummarizedExperiment (TreeSE) class.

The workflow

Importing the dataset

We get the data from MGnify database. It is a EMBL-EBI’s database for metagenomic data. This large microbiome database can be accessed with MGnifyR package which nowadays support TreeSE. The package will be submitted to Bioconductor’s next release.

We chose this dataset…

As loading takes some time, the dataset is already loaded.

For other available datasets and importing methods, check OMA.

# library(MGnifyR)
# mg <- MgnifyClient()
# 
# analyses <- searchAnalysis(mg, "studies", "MGYS00005128")
# analyses <- searchAnalysis(mg, "studies", "MGYS00000596")
# mae <- getResult(mg, analyses)

Thanks!

  • more material in OMA
  • miaverse logo
  • project website and QR also.
  • Contact info
  • Poster info

Session info

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21
## 
## Matrix products: default
## BLAS:   /opt/R/4.3.1/lib/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=fi_FI.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=fi_FI.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=fi_FI.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Helsinki
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] miaViz_1.9.0                   ggraph_2.1.0.9000             
##  [3] mia_1.9.12                     MultiAssayExperiment_1.27.5   
##  [5] TreeSummarizedExperiment_2.9.0 Biostrings_2.69.2             
##  [7] XVector_0.41.1                 SingleCellExperiment_1.23.0   
##  [9] SummarizedExperiment_1.31.1    Biobase_2.61.0                
## [11] GenomicRanges_1.53.1           GenomeInfoDb_1.37.3           
## [13] IRanges_2.35.2                 S4Vectors_0.39.1              
## [15] BiocGenerics_0.47.0            MatrixGenerics_1.13.1         
## [17] matrixStats_1.0.0              ggplot2_3.4.3                 
## [19] BiocManager_1.30.22           
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.15.0           jsonlite_1.8.7             
##   [3] magrittr_2.0.3              ggbeeswarm_0.7.2           
##   [5] farver_2.1.1                rmarkdown_2.24             
##   [7] zlibbioc_1.47.0             vctrs_0.6.3                
##   [9] memoise_2.0.1               DelayedMatrixStats_1.23.4  
##  [11] RCurl_1.98-1.12             ggtree_3.9.1               
##  [13] htmltools_0.5.6             S4Arrays_1.1.5             
##  [15] BiocNeighbors_1.19.0        SparseArray_1.1.11         
##  [17] gridGraphics_0.5-1          sass_0.4.7                 
##  [19] bslib_0.5.1                 plyr_1.8.8                 
##  [21] DECIPHER_2.29.0             cachem_1.0.8               
##  [23] igraph_1.5.1                lifecycle_1.0.3            
##  [25] pkgconfig_2.0.3             rsvd_1.0.5                 
##  [27] Matrix_1.6-1                R6_2.5.1                   
##  [29] fastmap_1.1.1               GenomeInfoDbData_1.2.10    
##  [31] digest_0.6.33               aplot_0.2.0                
##  [33] colorspace_2.1-0            ggnewscale_0.4.9           
##  [35] patchwork_1.1.3             scater_1.29.4              
##  [37] irlba_2.3.5.1               RSQLite_2.3.1              
##  [39] vegan_2.6-4                 beachmat_2.17.15           
##  [41] labeling_0.4.3              fansi_1.0.4                
##  [43] polyclip_1.10-4             abind_1.4-5                
##  [45] mgcv_1.9-0                  compiler_4.3.1             
##  [47] bit64_4.0.5                 withr_2.5.0                
##  [49] BiocParallel_1.35.4         viridis_0.6.4              
##  [51] DBI_1.1.3                   highr_0.10                 
##  [53] ggforce_0.4.1               MASS_7.3-60                
##  [55] DelayedArray_0.27.10        bluster_1.11.4             
##  [57] permute_0.9-7               tools_4.3.1                
##  [59] vipor_0.4.5                 beeswarm_0.4.0             
##  [61] ape_5.7-1                   glue_1.6.2                 
##  [63] nlme_3.1-163                grid_4.3.1                 
##  [65] cluster_2.1.4               reshape2_1.4.4             
##  [67] generics_0.1.3              gtable_0.3.4               
##  [69] tidyr_1.3.0                 BiocSingular_1.17.1        
##  [71] tidygraph_1.2.3             ScaledMatrix_1.9.1         
##  [73] utf8_1.2.3                  ggrepel_0.9.3              
##  [75] pillar_1.9.0                stringr_1.5.0              
##  [77] yulab.utils_0.0.8           splines_4.3.1              
##  [79] dplyr_1.1.2                 tweenr_2.0.2               
##  [81] treeio_1.25.4               lattice_0.21-8             
##  [83] bit_4.0.5                   tidyselect_1.2.0           
##  [85] DirichletMultinomial_1.43.0 scuttle_1.11.2             
##  [87] knitr_1.43                  gridExtra_2.3              
##  [89] xfun_0.40                   graphlayouts_1.0.0         
##  [91] stringi_1.7.12              lazyeval_0.2.2             
##  [93] ggfun_0.1.2                 yaml_2.3.7                 
##  [95] evaluate_0.21               codetools_0.2-19           
##  [97] tibble_3.2.1                ggplotify_0.1.2            
##  [99] cli_3.6.1                   munsell_0.5.0              
## [101] jquerylib_0.1.4             Rcpp_1.0.11                
## [103] parallel_4.3.1              blob_1.2.4                 
## [105] sparseMatrixStats_1.13.4    bitops_1.0-7               
## [107] decontam_1.21.0             viridisLite_0.4.2          
## [109] tidytree_0.4.5              scales_1.2.1               
## [111] purrr_1.0.2                 crayon_1.5.2               
## [113] rlang_1.1.1